library(glue)

Attaching package: 㤼㸱glue㤼㸲

The following object is masked from 㤼㸱package:dplyr㤼㸲:

    collapse

There were 16 warnings (use warnings() to see them)

Define common columns

cols <- c(
  "Date",
  "Regio",
  "Period",
  "Interval",
  "Population",
  "Deaths",
  "MortalityRate")

Load historical data for flu dataset

cols.historic <-
Warning messages:
1: In readChar(file, size, TRUE) : truncating string with embedded nuls
2: In readChar(file, size, TRUE) : truncating string with embedded nuls
3: In readChar(file, size, TRUE) : truncating string with embedded nuls
4: In readChar(file, size, TRUE) : truncating string with embedded nuls
5: In readChar(file, size, TRUE) : truncating string with embedded nuls
6: In readChar(file, size, TRUE) : truncating string with embedded nuls
7: In readChar(file, size, TRUE) : truncating string with embedded nuls
8: In readChar(file, size, TRUE) : truncating string with embedded nuls
  c(
    cols,
    "Year"
  )

data.historic <- union_all(
  read_rds("../results/data.Rds") %>% select(cols.historic),
  read_rds("../results/data.spatial.Rds") %>% select(cols.historic, "RegioS") %>% mutate("Regio" = RegioS) %>% select(-"RegioS"),
)
Note: Using an external vector in selections is ambiguous.
i Use `all_of(cols.historic)` instead of `cols.historic` to silence this message.
i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
This message is displayed once per session.
# data.flu2018 <- data.historic %>% 
#   filter(Year == 2018 | Year == 2017) %>% 
#   group_by(Year) %>%
#   mutate(
#     LastPeriod = max(Period, na.rm = T),
#     Period = if_else(Year == 2017 & Period == LastPeriod, 0, Period),
#   ) %>%
#   ungroup() %>%
#   mutate(Year = if_else(Period == 0, 2018, Year)) %>%
#   filter(Year == 2018) %>%
#   rename(Flu2018 = MortalityRate)

Additional RIVM data

Warning in readChar(file, size, TRUE) :
  truncating string with embedded nuls
Warning in readChar(file, size, TRUE) :
  truncating string with embedded nuls
Warning in readChar(file, size, TRUE) :
  truncating string with embedded nuls
Warning in readChar(file, size, TRUE) :
  truncating string with embedded nuls
Warning in readChar(file, size, TRUE) :
  truncating string with embedded nuls
Warning in readChar(file, size, TRUE) :
  truncating string with embedded nuls
Warning in readChar(file, size, TRUE) :
  truncating string with embedded nuls
Warning in readChar(file, size, TRUE) :
  truncating string with embedded nuls
data.rivm <- read_rds("../results/rivm.Rds") %>% rename(Rivm = Deaths)

Data for 2020 corona crisis

cols.corona <-
Warning messages:
1: In readChar(file, size, TRUE) : truncating string with embedded nuls
2: In readChar(file, size, TRUE) : truncating string with embedded nuls
3: In readChar(file, size, TRUE) : truncating string with embedded nuls
4: In readChar(file, size, TRUE) : truncating string with embedded nuls
5: In readChar(file, size, TRUE) : truncating string with embedded nuls
6: In readChar(file, size, TRUE) : truncating string with embedded nuls
7: In readChar(file, size, TRUE) : truncating string with embedded nuls
8: In readChar(file, size, TRUE) : truncating string with embedded nuls
  c(
    cols,
    "ExpectedMortality", "AvgMortality", "UnexpectedMortality", "ExcessiveMortality", "Flu2018"
  )

data.corona <- union_all(
  readRDS("../results/data.corona.Rds") %>% select(cols.corona),
  readRDS("../results/data.spatial.corona.Rds") %>% select(cols.corona, "RegioS") %>% mutate("Regio" = RegioS) %>% select(-RegioS)
) %>% 
  # left_join(data.flu2018, by = c("Period", "Regio")) %>%
  full_join(data.rivm, by = c("Regio", "Date")) %>%
  drop_na(Date)
Note: Using an external vector in selections is ambiguous.
i Use `all_of(cols.corona)` instead of `cols.corona` to silence this message.
i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
This message is displayed once per session.
data.corona %>% filter(Regio == "NL")

Geographical entities

Warning in readChar(file, size, TRUE) :
  truncating string with embedded nuls
Warning in readChar(file, size, TRUE) :
  truncating string with embedded nuls
Warning in readChar(file, size, TRUE) :
  truncating string with embedded nuls
Warning in readChar(file, size, TRUE) :
  truncating string with embedded nuls
Warning in readChar(file, size, TRUE) :
  truncating string with embedded nuls
Warning in readChar(file, size, TRUE) :
  truncating string with embedded nuls
Warning in readChar(file, size, TRUE) :
  truncating string with embedded nuls
Warning in readChar(file, size, TRUE) :
  truncating string with embedded nuls
areas <- GET("https://opendata.arcgis.com/datasets/e1f0dd70abcb4fceabbc43412e43ad4b_0.geojson") %>% content(as = "text") %>%
  read_sf() %>%
  rename(RegioS = Gemeentecode) %>% 
  arrange(Gemeentenaam) 
No encoding supplied: defaulting to UTF-8.
write_rds(areas, "../results/plots/areas.Rds")

Create a timeline and add splines for smooth charts

data.mort
Warning messages:
1: In readChar(file, size, TRUE) : truncating string with embedded nuls
2: In readChar(file, size, TRUE) : truncating string with embedded nuls
3: In readChar(file, size, TRUE) : truncating string with embedded nuls
4: In readChar(file, size, TRUE) : truncating string with embedded nuls
5: In readChar(file, size, TRUE) : truncating string with embedded nuls
6: In readChar(file, size, TRUE) : truncating string with embedded nuls
7: In readChar(file, size, TRUE) : truncating string with embedded nuls
8: In readChar(file, size, TRUE) : truncating string with embedded nuls

Example 1

tooltip <- ("<b>{Date}</b>
Mortality rate: {floor(Value)}
{if(is.na(Absolute) "" else \"Deaths: {floor(Absolute)}\" %>% glue()}"
Error: unexpected string constant in:
"Mortality rate: {floor(Value)}
{if(is.na(Absolute) "" else \"Deaths: {floor(Absolute)}\" %>% glue()}""

Example 2

Spatial dataset

data.spatial_absolute <- areas %>% left_join(data.corona %>% filter(Interval == "month"), by = c("RegioS" = "Regio")) %>% 
    mutate(
      ActualMortality = round(MortalityRate * Population / 100000),
      ExpectedMortality = round(ExpectedMortality * Population / 100000),
      AvgMortality = round(AvgMortality * Population / 100000),
      UnexpectedMortality = round(UnexpectedMortality * Population / 100000),
      ExcessiveMortality = round(ExcessiveMortality * Population / 100000),
    ) 

write_rds(data.spatial_absolute, "../results/plots/spatial_absolute.Rds")

Example

Summary table

---
title: "Preprocess data for plotting in the dashboard"
output: html_notebook
---

```{r}
library(tidyverse)
library(httr)
library(sf)
library(ggplot2)
library(tmap)
library(plotly)
library(glue)
```

### Define common columns

```{r}
cols <- c(
  "Date",
  "Regio",
  "Period",
  "Interval",
  "Population",
  "Deaths",
  "MortalityRate")
```

### Load historical data for flu dataset

```{r}
cols.historic <-
  c(
    cols,
    "Year"
  )

data.historic <- union_all(
  read_rds("../results/data.Rds") %>% select(cols.historic),
  read_rds("../results/data.spatial.Rds") %>% select(cols.historic, "RegioS") %>% mutate("Regio" = RegioS) %>% select(-"RegioS"),
)

# data.flu2018 <- data.historic %>% 
#   filter(Year == 2018 | Year == 2017) %>% 
#   group_by(Year) %>%
#   mutate(
#     LastPeriod = max(Period, na.rm = T),
#     Period = if_else(Year == 2017 & Period == LastPeriod, 0, Period),
#   ) %>%
#   ungroup() %>%
#   mutate(Year = if_else(Period == 0, 2018, Year)) %>%
#   filter(Year == 2018) %>%
#   rename(Flu2018 = MortalityRate)

```

### Additional RIVM data

```{r}
data.rivm <- read_rds("../results/rivm.Rds") %>% rename(Rivm = Deaths)
```

### Data for 2020 corona crisis

```{r}
cols.corona <-
  c(
    cols,
    "ExpectedMortality", "AvgMortality", "UnexpectedMortality", "ExcessiveMortality", "Flu2018"
  )

data.corona <- union_all(
  readRDS("../results/data.corona.Rds") %>% select(cols.corona),
  readRDS("../results/data.spatial.corona.Rds") %>% select(cols.corona, "RegioS") %>% mutate("Regio" = RegioS) %>% select(-RegioS)
) %>% 
  # left_join(data.flu2018, by = c("Period", "Regio")) %>%
  full_join(data.rivm, by = c("Regio", "Date")) %>%
  drop_na(Date)

data.corona %>% filter(Regio == "NL")
```

### Geographical entities

```{r}
areas <- GET("https://opendata.arcgis.com/datasets/e1f0dd70abcb4fceabbc43412e43ad4b_0.geojson") %>% content(as = "text") %>%
  read_sf() %>%
  rename(RegioS = Gemeentecode) %>% 
  arrange(Gemeentenaam) 

write_rds(areas, "../results/plots/areas.Rds")
```

### Create a timeline and add splines for smooth charts

```{r}
# Do some interpolation
dates <- data.corona %>% drop_na("Period", "Date") %>% distinct(Date) %>% pull("Date")
fields <- c("MortalityRate", "ExpectedMortality", "AvgMortality", "Flu2018", "DeathsUnexpected", "DeathsAboveAvg", "Rivm", "Deaths")
days <- data.frame(Date = seq(min(dates), to = max(dates), by = "days")) %>%
  crossing(data.corona %>% select(Regio, Interval) %>% distinct()) %>%
  crossing(fields) %>%
  set_names(c("Date", "Regio", "Interval", "Variable"))

data.mort <- data.corona %>% 
  mutate(
    DeathsUnexpected = UnexpectedMortality * Population / 100000,
    DeathsAboveAvg = ExcessiveMortality * Population / 100000,
  ) %>%
  gather("Variable", "Value", fields) %>%
  drop_na(Regio, Variable, Date) %>%
  full_join(days) %>%
  group_by(Regio, Interval, Variable) %>%
  filter(sum(!is.na(Value)) > 0) %>%
  # Add dates of first and last observation to only interpolate.
  mutate(
    Date2 = if_else(is.na(Value), as.Date(NA), Date),
    MinDate = min(Date2, na.rm = T),
    MaxDate = max(Date2, na.rm = T)
  ) %>%
  filter(Date >= MinDate & Date <= MaxDate) %>%
  mutate(
    Interpolated = approx(Date, Value, xout=Date)$y,
    Absolute = if_else(Variable == "MortalityRate", Value * Population / 100000, as.double(NA))
  ) %>%
  ungroup() %>%
  select("Regio", "Date", "Period", "Interval", "Variable", "Value", "Interpolated", "Absolute")

data.mort %>% filter(Regio == "NL", Variable == "Rivm") %>% arrange(Date)

write_rds(data.mort, "../results/plots/mortovertime.Rds")
```

#### Example 1

```{r purl=F}
x <- data.mort %>%
  filter(Variable %in% c("MortalityRate", "ExpectedMortality", "AvgMortality", "Flu2018")) %>%
  filter(Regio == "NL")

tooltip <- ("<b>{Date}</b>
Mortality rate: {floor(Value)}
{Deaths: {floor(Absolute)}"
)

ggplotly(tooltip = "text", ggplot(
      x,
      aes(
        group = Variable,
        color = Variable,
        text = tooltip %>% glue()
      )
    ) +
      geom_point(aes(x = Date, y = Value)) +
      geom_line(aes(x = Date, y = Interpolated)) +
      labs(x = "Period (weeks)" %>% glue(), y = "Deaths by 100.000 inhabitants") +
      theme_minimal() +
      theme(legend.title = element_blank())
)
```

#### Example 2

```{r purl=F}
x <- data.mort %>% 
  filter(Variable %in% c("Rivm", "DeathsAboveAvg", "DeathsUnexpected")) %>%
  filter(Regio == "NL")


tooltip <- ("<b>{Date}</b>
Deaths: {floor(Interpolated)}")

ggplotly(ggplot(
  x,
  aes(
    group = Variable,
    color = Variable,
    text =  tooltip %>% glue()
  )
) +
  geom_line(aes(x = Date, y = Interpolated)) +
  geom_point(aes(x = Date, y = Value)) +
  labs(x = "Period (weeks)" %>% glue(), y = "Absolute deaths") +
  theme_minimal() +
  theme(legend.title = element_blank()) +
  scale_color_brewer(palette = "Set2"),

  tooltip = "text"
)
```

### Spatial dataset

```{r}
data.spatial_absolute <- areas %>% left_join(data.corona %>% filter(Interval == "month"), by = c("RegioS" = "Regio")) %>% 
    mutate(
      ActualMortality = round(MortalityRate * Population / 100000),
      ExpectedMortality = round(ExpectedMortality * Population / 100000),
      AvgMortality = round(AvgMortality * Population / 100000),
      UnexpectedMortality = round(UnexpectedMortality * Population / 100000),
      ExcessiveMortality = round(ExcessiveMortality * Population / 100000),
    ) 

write_rds(data.spatial_absolute, "../results/plots/spatial_absolute.Rds")
```

#### Example

```{r}
tm_shape(data.spatial_absolute %>% filter(Period == 4)) +
  tm_polygons(
    col = "MortalityRate",
    id = "Gemeentenaam",
    title = "Mortality rate by municipality"
  )
```
### Summary table

```{r}
year_high_mort <- data.historic %>% group_by(Regio, Year) %>% 
  summarise(MortalityRate = mean(MortalityRate)) %>% 
  arrange(desc(MortalityRate)) %>% mutate(Variable = "YearHighestMort", Value = Year %>% as.character()) %>% 
  select(Variable, Value, Regio) %>%
  slice(1)

data.summary <- data.mort %>% 
      filter(Period > 0 & Variable %in% c("Deaths", "Rivm", "DeathsAboveAvg", "DeathsUnexpected")) %>%
      union_all(year_high_mort) %>%
      group_by(Regio, Variable) %>%
      summarise(Value = ceiling(sum(Value, na.rm = T))) %>%
      ungroup()

write_rds(data.summary, "../results/plots/summary.Rds")

data.summary %>% filter(Regio == "GM0345") %>% spread(Variable, Value)
```

